import os,json
import numpy as np
import jieba as jb

with open('./data/AllCutted.json','r',encoding='utf8') as f:
    data=f.read()

data=json.loads(data)[:1200000]

def del_stopwords(li,stops):
    i=0
    while i<len(li):
        if li[i] in stops:
            li.pop(i)
            i-=1
        i+=1
    return li

stops=[]
fs=os.walk('./stopwords/')
for _f in fs:
    root=_f[0]
    for fN in _f[-1]:
        fPath=root+'/'+fN
        with open(fPath,'r',encoding='utf8') as f:
            stops+=[wd.replace('\n','') for wd in f.readlines()]

# data=del_stopwords(data,stops)

print(len(data))
module={}
k=[]
for i in range(len(data)-1):
    if data[i] not in k:
        module[data[i]]={}
        k.append(data[i])
    guess=module[data[i]]
    if data[i+1] in guess.keys():
        guess[data[i+1]]+=1
    else:
        guess[data[i+1]]=1

def sortByKey(__dict):
    items=list(__dict.items())
    __dict=dict(sorted(items,key=lambda x:x[0]))
    return __dict

keys=module.keys()
for k in keys:
    module[k]=sortByKey(module[k])

while 1:
    try:
        msg=input('>_')
        # msg=del_stopwords(jb.lcut(msg),stops)[-1]
        msg=jb.lcut(msg)[-1]
        print(list(module[msg].keys()))
    except:
        print('未记录')